import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.express as px
import ipywidgets as widgets
pd.options.display.max_colwidth = 200
pd.set_option('display.max_columns', None)
df= pd.read_csv("data/compustat_from_1990.csv")
df['sic1']= (df['sich']/1000).astype(int).astype(str)
df=df[['conm', 'gvkey', 'tic', 'fyear', 'at', 'ni', 'sale', 'mv', 'sic1', 'sich']]
df['lag_at']= df.groupby('gvkey')['at'].shift(1)
df['ROA']= df['ni'] / df['lag_at']
print(df.shape)
df.head(3)
(211983, 12)
| conm | gvkey | tic | fyear | at | ni | sale | mv | sic1 | sich | lag_at | ROA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AAR CORP | 1004 | AIR | 1990 | 379.958 | 14.801 | 466.542 | 224.460360 | 5 | 5080.0 | NaN | NaN |
| 1 | AAR CORP | 1004 | AIR | 1991 | 395.351 | 10.020 | 422.657 | 204.699595 | 5 | 5080.0 | 379.958 | 0.026371 |
| 2 | AAR CORP | 1004 | AIR | 1992 | 365.151 | 0.283 | 382.780 | 214.663477 | 5 | 5080.0 | 395.351 | 0.000716 |
https://en.wikipedia.org/wiki/Standard_Industrial_Classification
df.dropna(inplace=True)
df= df[df['ROA'].abs()<0.5]
df= df[df['sale']>0]
df= df[df['at']>0]
df= df[df['mv']>0]
df.shape
(143046, 12)
df_N= df.groupby('gvkey').agg({'gvkey':'count', 'sic1':'nunique'}).rename(columns={'gvkey':'N', 'sic1':'ind_count'}).reset_index()
df_N.head()
| gvkey | N | ind_count | |
|---|---|---|---|
| 0 | 1004 | 30 | 1 |
| 1 | 1009 | 4 | 1 |
| 2 | 1011 | 4 | 1 |
| 3 | 1013 | 19 | 1 |
| 4 | 1017 | 4 | 1 |
df_to_filter= df_N[(df_N.ind_count==1) & (df_N.N==df_N.N.max())]
balance_gvkey_list= list(df_to_filter.gvkey)
len(balance_gvkey_list)
585
df= df[df.gvkey.isin(balance_gvkey_list)]
df.shape
(17550, 12)
df.fyear.value_counts().sort_index().plot(kind='bar')
<AxesSubplot:>
Balanced panel is confirmed.
df_2020= df[df.fyear==2020]
gvkey_100_list= list(df_2020.sort_values('mv', ascending= False).head(100)['gvkey'])
df=df[df.gvkey.isin(gvkey_100_list)]
df.shape
(3000, 12)
xcol='sale'
ycol='ROA'
# Define size variable to be used as a bubble size
bubble_size_base= 'mv'
df['bubble_size']= df[bubble_size_base].clip(df[bubble_size_base].quantile(0.3),)
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
log_x= True,
color='sic1',
hover_name= 'conm'
)
fig.update_layout(
title={
'text': f"Dynamic Scatter Plots: {xcol} & {ycol} with {bubble_size_base} as a bubble size",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig.show()
xcol='mv'
ycol='at'
# Define size variable to be used as a bubble size
bubble_size_base= 'mv'
df['bubble_size']= df[bubble_size_base].clip(df[bubble_size_base].quantile(0.3),)
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
log_x= True, # Log transformation for x
log_y= True, # Log transformation for y
color='sic1',
hover_name= 'conm'
)
fig.update_layout(
title={
'text': f"Dynamic Scatter Plots: {xcol} & {ycol} with {bubble_size_base} as a bubble size",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig.show()
fig.write_html('dynamic-scatter-mv-at.html', auto_play= False)
! jupyter nbconvert compustat-bubble-plot-animation.ipynb --to html
[NbConvertApp] Converting notebook compustat-bubble-plot-animation.ipynb to html [NbConvertApp] Writing 1288453 bytes to compustat-bubble-plot-animation.html
xcol='mv'
ycol='at'
# Define size variable to be used as a bubble size
bubble_size_base= 'mv'
df['bubble_size']= df[bubble_size_base].clip(df[bubble_size_base].quantile(0.3),)
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
log_x= True, # Log transformation for x
log_y= True, # Log transformation for y
color='sic1',
hover_name= 'conm'
)
fig.update_layout(
title={
'text': f"Dynamic Scatter Plots: {xcol} & {ycol} with {bubble_size_base} as a bubble size",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig.show()
app = JupyterDash(__name__)
app.layout= html.Div([
html.H4('TITLE'),
dcc.Dropdown(['asset', 'sale']),
dcc.Graph(id='scatter-plot'),
html.P("Select X:")
])
@app.collaback(
Output("scatter-plot", "figure")
)
def scatter_plot(slider_range):
xcol='mv'
ycol='at'
# Define size variable to be used as a bubble size
bubble_size_base= 'mv'
df['bubble_size']= df[bubble_size_base].clip(df[bubble_size_base].quantile(0.3),)
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
log_x= True, # Log transformation for x
log_y= True, # Log transformation for y
color='sic1',
hover_name= 'conm'
)
return fig
app.run_server(mode='inline')
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from jupyter_dash import JupyterDash
app = JupyterDash(__name__)
app.layout = html.Div([
html.H4('Interactive scatter plot with Iris dataset'),
dcc.Graph(id="scatter-plot"),
html.P("Filter by petal width:"),
dcc.RangeSlider(
id='range-slider',
min=0, max=2.5, step=0.1,
marks={0: '0', 2.5: '2.5'},
value=[0.5, 2]
),
])
@app.callback(
Output("scatter-plot", "figure"),
Input("range-slider", "value"))
def update_bar_chart(slider_range):
df = px.data.iris() # replace with your own data source
low, high = slider_range
mask = (df['petal_width'] > low) & (df['petal_width'] < high)
fig = px.scatter(
df[mask], x="sepal_width", y="sepal_length",
color="species", size='petal_length',
hover_data=['petal_width'])
return fig
app.run_server(mode='inline')